
import pandas as pd

train_df=pd.read_csv('./Project/dataset/titanic/train.csv') #训练集
test_df=pd.read_csv('./Project/dataset/titanic/test.csv')  #测试集
predict_df=pd.read_csv('./Project/dataset/titanic/gender_submission.csv')  #测试集

print("\n\n--------------------------训练集 train 的内容：\n\n", train_df)
print("\n\n--------------------------测试集 test  的内容：\n\n", test_df)
print("\n\n--------------------------提交结果predict模板: \n\n", predict_df)


import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = [u'simHei']   # 显示中文
plt.rcParams['axes.unicode_minus'] = False      # 解决负号问题


fig = plt.figure()             #  定义figure
fig.set(alpha = 0.5)           # 设置图表颜色alpha参数

# 绘制获救人数的柱状图
plt.subplot2grid((1,3),(0,0))       # 分为两行三列
train_df.Survived.value_counts().plot(kind = "bar")  # 绘制直方图
plt.title(u"获救情况（1为获救）")    # 标题
plt.ylabel(u"人数")

# 绘制乘客等级分布的柱状图
plt.subplot2grid((1,3),(0,1))
train_df.Pclass.value_counts().plot(kind = "bar")
plt.title("乘客等级分布图")
plt.ylabel(u"人数")

plt.subplot2grid((1,3),(0,2))
train_df.Embarked.value_counts().plot(kind = "bar")
plt.title(u"各登船口岸上船人数分布图")
plt.ylabel(u"人数")
plt.show()


fig = plt.figure()             #  定义figure
fig.set(alpha = 0.5)

plt.subplot2grid((1,3),(0,0))
plt.scatter(train_df.Survived,train_df.Age)
plt.ylabel(u"年龄")
plt.grid(b = True,which='major',axis='y')
plt.title(u"按年龄看获救分布（1为获救）")

# 绘制不同等级的乘客年龄分布
plt.subplot2grid((1,3),(0,1),colspan=2)
train_df.Age[train_df.Pclass == 1].plot(kind = "kde")
train_df.Age[train_df.Pclass == 2].plot(kind = "kde")
train_df.Age[train_df.Pclass == 3].plot(kind = "kde")
plt.xlabel(u"年龄")
plt.ylabel(u"密度")
plt.title(u"各等级的乘客年龄分布")
plt.legend((u'头等舱',u'2等舱',u'3等舱'),loc = "best")
plt.show()


# 查看各乘客等级的获救情况
Survived_0 = train_df.Pclass[train_df.Survived == 0].value_counts()
Survived_1 = train_df.Pclass[train_df.Survived == 1].value_counts()
df = pd.DataFrame({u'获救':Survived_1, u'未获救':Survived_0})
df.plot(kind = "bar",stacked = True)
plt.title(u'各乘客等级获救的情况')
plt.xlabel(u"乘客等级")
plt.ylabel(u"人数")
plt.show()
plt.close()


# 查看各性别的获救情况
fig = plt.figure()
fig.set(alpha = 0.2)
Survived_m = train_df.Survived[train_df.Sex == 'male'].value_counts()
Survived_f = train_df.Survived[train_df.Sex == 'female'].value_counts()
df = pd.DataFrame({u'男':Survived_m, u'女':Survived_f})
df.plot(kind = "bar",stacked = True)
plt.title(u'按性别看获救的情况')
plt.xlabel(u"性别")
plt.ylabel(u"人数")
plt.show()


# 不同舱级别的情况下各性别的获救情况
fig = plt.figure()
fig.set(alpha = 0.5)
plt.title(u"根据舱等级和性别看获救情况")
ax1 = fig.add_subplot(141)
train_df.Survived[train_df.Sex == 'female'][train_df.Pclass != 3].value_counts().plot(kind = "bar",
                                                                                            label = "female highclass",color = "#FA2479")
ax1.set_xticklabels([u"获救",u"未获救"],rotation = 0)
ax1.legend([u"女性/高级舱"],loc = "best")

ax2 = fig.add_subplot(142,sharey = ax1)
train_df.Survived[train_df.Sex == 'female'][train_df.Pclass==3].value_counts().plot(kind = "bar",
                                                                                          label = 'female,low class',color = "pink")
ax2.set_xticklabels([u"未获救",u"获救"],rotation=0)
plt.legend([u"女性/低级舱"],loc = 'best')

ax3 = fig.add_subplot(143,sharey = ax1)
train_df.Survived[train_df.Sex == 'male'][train_df.Pclass != 3].value_counts().plot(kind = "bar",
                                                                                          label = "male,high class",color = "lightblue")
ax3.set_xticklabels([u"未获救",u"获救"],rotation = 0)
plt.legend([u"男性/高级舱"],loc = 'best')

ax4 = fig.add_subplot(144,sharey = ax1)
train_df.Survived[train_df.Sex == 'male'][train_df.Pclass == 3].value_counts().plot(kind = "bar",
                                                                                          label = 'male low class',color = "steelblue")
ax4.set_xticklabels([u"未获救","获救"],rotation = 0)
plt.legend([u"男性/低级舱"], loc = "best")
plt.show()
plt.close()


# 查看不同港口的获救情况
fig = plt.figure()
fig.set(alpha = 0.2)
Survived_0 = train_df.Embarked[train_df.Survived == 0].value_counts()
Survived_1 = train_df.Embarked[train_df.Survived == 1].value_counts()
df = pd.DataFrame({u"获救":Survived_1, u"未获救":Survived_0})
df.plot(kind = "bar",stacked = "True")
plt.title(u"各港口乘客的获救情况")
plt.xlabel(u"登录港口")
plt.ylabel(u"人数")
plt.show()
plt.close()


# 按Cabin查看获救情况
fig = plt.figure()
fig.set(alpha = 0.2)
Survived_cabin = train_df.Survived[pd.notnull(train_df.Cabin)].value_counts()
Survived_nocabin = train_df.Survived[pd.isnull(train_df.Cabin)].value_counts()
df = pd.DataFrame({u"有":Survived_cabin , u"无":Survived_nocabin}).transpose()
df.plot(kind = "bar" , stacked = True)
plt.title(u"按Cabin有无来看获救情况")
plt.xlabel(u"有无cabin")
plt.ylabel(u"人数")
plt.show()
plt.close()
